'''
    This program will place each college's graduate quality in the U.S. distribution
    of college graduate quality.
'''

import sys
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import csv
import statsmodels.api as sm
import statsmodels.formula.api as smf
from datetime import datetime
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import gc 

from statsmodels.distributions.empirical_distribution import ECDF

fileSeed = "C:/Users/jsock/Dropbox/Research/GD/International/Replication/"
inputPath = fileSeed + "/Estimates/"

##############################################################################
# Read in estimates for college graduate quality and get country distributions
##############################################################################

df = pd.read_csv(inputPath + 'School_q_j_domestic_only.csv')

dfUSA = df[df.university_country_iso=='USA']

ecdf_usa = ECDF(dfUSA['q_j'])

df['fit_ecdf_usa'] = df.q_j.apply(lambda x: ecdf_usa(x))

df = df[['school','university_country_iso','q_j','fit_ecdf_usa']]

df.to_csv(inputPath + "School_q_j_distribution_relative_US.csv")






